RRPlot and the Colon data set

Libraries

library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
#library(corrplot)
#source("~/GitHub/FRESA.CAD/R/RRPlot.R")
#source("~/GitHub/FRESA.CAD/R/PoissonEventRiskCalibration.R")
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
#pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

The data set

data(cancer)
colon <- subset(colon,etype==1)
colon$etype <- NULL
rownames(colon) <- colon$id
colon$id <- NULL
colon <- colon[complete.cases(colon),]
time <- colon$time
status <- colon$status
data <- colon
data$time <- NULL
data$study <- NULL
table(data$status)

0 1 442 446

dataColon <- as.data.frame(model.matrix(status~.*age,data))
dataColon$`(Intercept)` <- NULL
dataColon$time <- time/365
dataColon$status <- status
colnames(dataColon) <-str_replace_all(colnames(dataColon),":","_")
colnames(dataColon) <-str_replace_all(colnames(dataColon),"\\.","_")
colnames(dataColon) <-str_replace_all(colnames(dataColon),"\\+","_")
data <- NULL

trainsamples <- sample(nrow(dataColon),0.7*nrow(dataColon))
dataColonTrain <- dataColon[trainsamples,]
dataColonTest <- dataColon[-trainsamples,]


pander::pander(table(dataColonTrain$status))
0 1
309 312
pander::pander(table(dataColonTest$status))
0 1
133 134

Modeling

ml <- BSWiMS.model(Surv(time,status)~1,data=dataColonTrain,NumberofRepeats = 10)

[++-++++-+++-++-+++++-+++++-++++++]..

sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower HR upper u.Accuracy
node4 2.96e-01 1.206 1.345 1.500 0.605
age_nodes -1.31e-05 1.000 1.000 1.000 0.610
age_node4 3.37e-03 1.002 1.003 1.005 0.605
nodes 3.83e-02 1.017 1.039 1.062 0.609
rxLev_5FU_age -2.51e-03 0.996 0.997 0.999 0.557
rxLev_5FU -1.20e-01 0.822 0.887 0.957 0.557
adhere 1.39e-01 1.042 1.149 1.268 0.539
age -1.16e-03 0.998 0.999 1.000 0.519
age_adhere 1.14e-03 1.000 1.001 1.002 0.539
extent 9.35e-02 1.022 1.098 1.179 0.541
Table continues below
  r.Accuracy full.Accuracy u.AUC r.AUC full.AUC
node4 0.579 0.618 0.607 0.578 0.619
age_nodes 0.557 0.616 0.611 0.557 0.617
age_node4 0.598 0.611 0.607 0.598 0.612
nodes 0.586 0.618 0.609 0.586 0.618
rxLev_5FU_age 0.627 0.618 0.556 0.628 0.619
rxLev_5FU 0.611 0.611 0.556 0.612 0.612
adhere 0.608 0.620 0.541 0.609 0.620
age 0.615 0.618 0.519 0.616 0.619
age_adhere 0.617 0.621 0.541 0.618 0.621
extent 0.604 0.606 0.539 0.605 0.607
  IDI NRI z.IDI z.NRI Delta.AUC Frequency
node4 0.03866 0.414 5.75 6.08 4.12e-02 1.0
age_nodes 0.02188 0.387 4.94 5.20 6.01e-02 1.0
age_node4 0.02538 0.390 4.54 5.76 1.39e-02 1.0
nodes 0.01669 0.274 3.94 3.61 3.27e-02 1.0
rxLev_5FU_age 0.01262 0.225 3.25 3.00 -8.76e-03 1.0
rxLev_5FU 0.01125 0.225 3.10 3.00 -4.97e-05 1.0
adhere 0.00732 0.169 2.79 3.01 1.15e-02 1.0
age 0.00841 0.146 2.77 1.82 2.83e-03 0.2
age_adhere 0.00650 0.212 2.66 3.52 3.46e-03 0.6
extent 0.00897 0.150 2.56 2.70 2.48e-03 0.8

Cox Model Performance

Here we evaluate the model using the RRPlot() function.

The evaluation of the raw Cox model with RRPlot()

Here we will use the predicted event probability assuming a baseline hazard for events withing 5 years

index <- predict(ml,dataColonTrain)
timeinterval <- 2*mean(subset(dataColonTrain,status==1)$time)

h0 <- sum(dataColonTrain$status & dataColonTrain$time <= timeinterval)
h0 <- h0/sum((dataColonTrain$time > timeinterval) | (dataColonTrain$status==1))

rdata <- cbind(dataColonTrain$status,ppoisGzero(index,h0))

rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90),
                     timetoEvent=dataColonTrain$time,
                     title="Raw Train: Colon Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Uncalibrated Performance Report

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100
Thr 0.497 0.332 0.253 0.22072
RR 1.622 1.833 2.277 1.00000
SEN 0.276 0.631 0.955 1.00000
SPE 0.896 0.667 0.149 0.00324
BACC 0.586 0.649 0.552 0.50162
pander::pander(t(rrAnalysisTrain$OERatio),caption="O/E Ratio")
O/E Ratio
est lower upper
0.995 0.888 1.11
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Ratio")
O/E Ratio
mean 50% 2.5% 97.5%
1.51 1.51 1.48 1.54
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Ratio")
O/Acum Ratio
mean 50% 2.5% 97.5%
1.36 1.36 1.35 1.36
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.649 0.649 0.62 0.678
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.685 0.643 0.726
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.276 0.227 0.329
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.9 0.861 0.931
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% at_max_BACC at_max_RR atSPE100
0.497 0.332 0.253 0.221
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.65 1.43 1.91
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 53.712276 on 1 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 504 226 270 7.17 53.7
class=1 117 86 42 46.07 53.7

Cox Calibration

op <- par(no.readonly = TRUE)


calprob <- CoxRiskCalibration(ml,dataColonTrain,"status","time")

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.685 1.52 3.04

The RRplot() of the calibrated model

h0 <- calprob$h0
timeinterval <- calprob$timeInterval;

rdata <- cbind(dataColonTrain$status,calprob$prob)


rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90),
                     timetoEvent=dataColonTrain$time,
                     title="Calibrated Train: Colon",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Calibrated Train Performance

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100
Thr 0.649 0.459 0.359 0.31610
RR 1.622 1.833 2.277 1.00000
SEN 0.276 0.631 0.955 1.00000
SPE 0.896 0.667 0.149 0.00324
BACC 0.586 0.649 0.552 0.50162
pander::pander(t(rrAnalysisTrain$OERatio),caption="O/E Ratio")
O/E Ratio
est lower upper
0.763 0.681 0.853
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Ratio")
O/E Ratio
mean 50% 2.5% 97.5%
0.964 0.964 0.947 0.979
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Ratio")
O/Acum Ratio
mean 50% 2.5% 97.5%
1.04 1.04 1.04 1.04
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.649 0.649 0.619 0.68
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.685 0.643 0.726
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.276 0.227 0.329
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.9 0.861 0.931
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% at_max_BACC at_max_RR atSPE100
0.649 0.459 0.359 0.316
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.65 1.43 1.91
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 53.712276 on 1 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 504 226 270 7.17 53.7
class=1 117 86 42 46.07 53.7

Evaluating on the test set

The calibrated h0 and timeinterval were estimated on the training set

index <- predict(ml,dataColonTest)
rdata <- cbind(dataColonTest$status,ppoisGzero(index,h0))

rrAnalysisTest <- RRPlot(rdata,atThr = rrAnalysisTrain$thr_atP,
                     timetoEvent=dataColonTest$time,
                     title="Test: Colon Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Test Performance

pander::pander(t(rrAnalysisTest$keyPoints),caption="Threshold values")
Threshold values (continued below)
  @:0.648769528990332 @:0.459009260615184 @:0.359085607123185
Thr 0.644 0.458 0.359
RR 1.687 1.618 10.769
SEN 0.328 0.627 0.993
SPE 0.880 0.609 0.143
BACC 0.604 0.618 0.568
  @:0.316100113104116 @MAX_BACC @MAX_RR @SPE100
Thr 0.3515 0.435 0.362 0.3515
RR 63.0588 2.257 13.136 63.0588
SEN 1.0000 0.836 0.993 1.0000
SPE 0.0902 0.451 0.173 0.0902
BACC 0.5451 0.643 0.583 0.5451
pander::pander(t(rrAnalysisTest$OERatio),caption="O/E Ratio")
O/E Ratio
est lower upper
0.812 0.681 0.962
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Ratio")
O/E Ratio
mean 50% 2.5% 97.5%
1.03 1.03 1.01 1.05
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Ratio")
O/Acum Ratio
mean 50% 2.5% 97.5%
1.04 1.04 1.03 1.04
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.671 0.672 0.631 0.719
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.695 0.632 0.757
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.328 0.25 0.415
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.887 0.821 0.935
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% at_max_BACC at_max_RR atSPE100 at_max_BACC at_max_RR atSPE100
0.649 0.459 0.359 0.316 0.435 0.362 0.352
pander::pander(t(rrAnalysisTest$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.69 1.36 2.1
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
Logrank test Chisq = 37.292956 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 132 51 75.3 7.84873 18.04578
class=1 76 39 38.7 0.00177 0.00249
class=2 59 44 19.9 28.99644 34.44194

Cross-Validation

Here we will cross validate the training set and evaluate also on the testing set. The h0 and the timeinterval are the ones estimated on the calibration process

rcv <- randomCV(theData=dataColonTrain,
                theOutcome = Surv(time,status)~1,
                fittingFunction=BSWiMS.model, 
                trainFraction = 0.75,
                repetitions=50,
                classSamplingType = "Pro",
                testingSet=dataColonTest
         )

.[++++++-].[++-].[++++].[++++-].[++++-].[+++++].[++-].[+++-].[+++].[+++]10 Tested: 856 Avg. Selected: 8 Min Tests: 1 Max Tests: 10 Mean Tests: 4.941589 . MAD: 0.4764983

.[++++].[++++-].[++++-].[++++-].[++].[+++++].[+++-].[++++].[++++-].[++]20 Tested: 887 Avg. Selected: 8.3 Min Tests: 1 Max Tests: 20 Mean Tests: 9.537768 . MAD: 0.4765619

.[++++-].[++++].[++++-].[+++].[++++-].[+++-].[+++++++].[+++].[++++-].[+++-]30 Tested: 888 Avg. Selected: 8.2 Min Tests: 1 Max Tests: 30 Mean Tests: 14.29054 . MAD: 0.4768923

.[++++].[++++-].[++-].[++-].[++++].[++++-].[+++].[+++-].[+++++++].[+++++]40 Tested: 888 Avg. Selected: 8.05 Min Tests: 3 Max Tests: 40 Mean Tests: 19.05405 . MAD: 0.476948

.[+++].[++-].[++++].[+++].[++++-].[++++-].[+++].[++++-].[++++].[+++]50 Tested: 888 Avg. Selected: 8.1 Min Tests: 4 Max Tests: 50 Mean Tests: 23.81757 . MAD: 0.4763361

stp <- rcv$survTestPredictions
stp <- stp[!is.na(stp[,4]),]

bbx <- boxplot(unlist(stp[,1])~rownames(stp),plot=FALSE)
times <- bbx$stats[3,]
status <- boxplot(unlist(stp[,2])~rownames(stp),plot=FALSE)$stats[3,]
prob <- ppoisGzero(boxplot(unlist(stp[,4])~rownames(stp),plot=FALSE)$stats[3,],h0)

rdatacv <- cbind(status,prob)
rownames(rdatacv) <- bbx$names
names(times) <- bbx$names

rrAnalysisCVTest <- RRPlot(rdatacv,atThr = rrAnalysisTrain$thr_atP,
                     timetoEvent=times,
                     title="CV Test: Colon Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

CV Test Performance

pander::pander(t(rrAnalysisCVTest$keyPoints),caption="Threshold values")
Threshold values (continued below)
  @:0.648769528990332 @:0.459009260615184 @:0.359085607123185
Thr 0.649 0.459 0.35811
RR 1.599 1.727 1.51020
SEN 0.300 0.742 0.99552
SPE 0.878 0.493 0.00905
BACC 0.589 0.618 0.50228
  @:0.316100113104116 @MAX_BACC @MAX_RR @SPE100
Thr 0.35354 0.463 0.418 0.35354
RR 1.00000 1.767 2.204 1.00000
SEN 1.00000 0.731 0.919 1.00000
SPE 0.00452 0.520 0.244 0.00452
BACC 0.50226 0.626 0.582 0.50226
pander::pander(t(rrAnalysisCVTest$OERatio),caption="O/E Ratio")
O/E Ratio
est lower upper
0.756 0.687 0.829
pander::pander(t(rrAnalysisCVTest$OE95ci),caption="O/E Ratio")
O/E Ratio
mean 50% 2.5% 97.5%
0.93 0.929 0.917 0.943
pander::pander(t(rrAnalysisCVTest$OAcum95ci),caption="O/Acum Ratio")
O/Acum Ratio
mean 50% 2.5% 97.5%
0.999 0.999 0.997 1
pander::pander(rrAnalysisCVTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.646 0.646 0.623 0.671
pander::pander(t(rrAnalysisCVTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.672 0.637 0.707
pander::pander((rrAnalysisCVTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.3 0.258 0.345
pander::pander((rrAnalysisCVTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.878 0.844 0.907
pander::pander(t(rrAnalysisCVTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% at_max_BACC at_max_RR atSPE100 at_max_BACC at_max_RR atSPE100
0.649 0.459 0.359 0.316 0.463 0.418 0.354
pander::pander(t(rrAnalysisCVTest$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.6 1.42 1.81
pander::pander(rrAnalysisCVTest$surdif,caption="Logrank test")
Logrank test Chisq = 98.670141 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 334 115 196.8 33.97 61.2
class=1 366 197 180.6 1.49 2.5
class=2 188 134 68.6 62.26 74.1